Q8_8_1 is general programming skills
# reading input
df<-read.csv("input/commits_novelty.csv", header =TRUE, sep=",")
df <- df[complete.cases(df), ]
df
df$group = factor(df$group)
# create new columns called log relational novelty
df$log_relational_novelty <- log(df$similarity+1)
df$log_count <- log(df$count+1)
df
# standardizing variables for skills and aspirations.
cols <- c("Q7_Q7_1", "Q7_Q7_2", "Q8_Q8_1", "Q10", "log_relational_novelty", "log_count")
df[cols] <- scale(df[cols])
df
mod <- lm(log_count ~ factor(group), data=df)
summary(mod)
Call:
lm(formula = log_count ~ factor(group), data = df)
Residuals:
Min 1Q Median 3Q Max
-1.1938 -0.9742 -0.1165 0.5462 3.4873
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.07037 0.08193 -0.859 0.3907
factor(group)1 0.21956 0.11368 1.931 0.0539 .
factor(group)2 -0.06328 0.11773 -0.537 0.5911
factor(group)3 0.10161 0.11301 0.899 0.3689
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9968 on 607 degrees of freedom
Multiple R-squared: 0.01137, Adjusted R-squared: 0.006485
F-statistic: 2.327 on 3 and 607 DF, p-value: 0.07358
mod <- lm( log_relational_novelty ~ Q10 + Q8_Q8_1 + Q7_Q7_1 + Q7_Q7_2, data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ Q10 + Q8_Q8_1 + Q7_Q7_1 +
Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.1587 -0.4105 0.3565 0.7363 1.3073
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -6.293e-16 4.006e-02 0.000 1.00000
Q10 1.887e-02 4.252e-02 0.444 0.65747
Q8_Q8_1 8.371e-02 4.447e-02 1.882 0.06026 .
Q7_Q7_1 -7.379e-02 4.973e-02 -1.484 0.13836
Q7_Q7_2 1.383e-01 5.077e-02 2.725 0.00662 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9901 on 606 degrees of freedom
Multiple R-squared: 0.02607, Adjusted R-squared: 0.01964
F-statistic: 4.055 on 4 and 606 DF, p-value: 0.002976
mod <- lm( log_relational_novelty ~ log_count , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ log_count, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2833 -0.4112 0.2627 0.6549 1.4603
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.026e-16 3.743e-02 0.00 1
log_count 3.816e-01 3.746e-02 10.19 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9251 on 609 degrees of freedom
Multiple R-squared: 0.1456, Adjusted R-squared: 0.1442
F-statistic: 103.8 on 1 and 609 DF, p-value: < 2.2e-16
mod <- lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2745 -0.4240 0.2415 0.6170 1.4761
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.17463 0.07542 -2.315 0.02093 *
factor(group)1 0.13181 0.10606 1.243 0.21441
factor(group)2 0.24455 0.10863 2.251 0.02474 *
factor(group)3 0.31475 0.10396 3.028 0.00257 **
log_count 0.37561 0.03755 10.003 < 2e-16 ***
Q7_Q7_1 -0.05097 0.04623 -1.102 0.27070
Q7_Q7_2 0.12593 0.04732 2.661 0.00799 **
Q8_Q8_1 0.05415 0.04118 1.315 0.18902
Q10 -0.02165 0.04012 -0.540 0.58955
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9124 on 602 degrees of freedom
Multiple R-squared: 0.1784, Adjusted R-squared: 0.1675
F-statistic: 16.34 on 8 and 602 DF, p-value: < 2.2e-16
mod <- lm( log_relational_novelty ~ factor(group)/stage + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group)/stage + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.3576 -0.4230 0.2610 0.6157 1.6005
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.40408 0.18391 -2.197 0.02840 *
factor(group)1 0.26077 0.25526 1.022 0.30739
factor(group)2 0.09840 0.28683 0.343 0.73169
factor(group)3 0.43213 0.25337 1.706 0.08861 .
log_count 0.38321 0.03765 10.177 < 2e-16 ***
Q7_Q7_1 -0.05030 0.04616 -1.090 0.27624
Q7_Q7_2 0.12720 0.04724 2.693 0.00729 **
Q8_Q8_1 0.05223 0.04112 1.270 0.20454
Q10 -0.02287 0.04005 -0.571 0.56822
factor(group)0:stage 0.09193 0.06719 1.368 0.17174
factor(group)1:stage 0.03997 0.06441 0.621 0.53515
factor(group)2:stage 0.13794 0.07560 1.825 0.06856 .
factor(group)3:stage 0.04473 0.06362 0.703 0.48225
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9109 on 598 degrees of freedom
Multiple R-squared: 0.1866, Adjusted R-squared: 0.1703
F-statistic: 11.43 on 12 and 598 DF, p-value: < 2.2e-16
mod <- lm( log_relational_novelty ~ factor(group) * stage + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group) * stage +
log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.3576 -0.4230 0.2610 0.6157 1.6005
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.40408 0.18391 -2.197 0.02840 *
factor(group)1 0.26077 0.25526 1.022 0.30739
factor(group)2 0.09840 0.28683 0.343 0.73169
factor(group)3 0.43213 0.25337 1.706 0.08861 .
stage 0.09193 0.06719 1.368 0.17174
log_count 0.38321 0.03765 10.177 < 2e-16 ***
Q7_Q7_1 -0.05030 0.04616 -1.090 0.27624
Q7_Q7_2 0.12720 0.04724 2.693 0.00729 **
Q8_Q8_1 0.05223 0.04112 1.270 0.20454
Q10 -0.02287 0.04005 -0.571 0.56822
factor(group)1:stage -0.05196 0.09304 -0.558 0.57671
factor(group)2:stage 0.04601 0.10094 0.456 0.64869
factor(group)3:stage -0.04720 0.09254 -0.510 0.61023
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9109 on 598 degrees of freedom
Multiple R-squared: 0.1866, Adjusted R-squared: 0.1703
F-statistic: 11.43 on 12 and 598 DF, p-value: < 2.2e-16
# Proposed model by stepwise regression
library(stats)
mod <- lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.1984 -0.4152 0.2400 0.6402 1.5394
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.17752 0.07523 -2.360 0.01861 *
factor(group)1 0.12829 0.10455 1.227 0.22027
factor(group)2 0.25395 0.10827 2.345 0.01932 *
factor(group)3 0.32098 0.10365 3.097 0.00205 **
log_count 0.37833 0.03716 10.180 < 2e-16 ***
Q7_Q7_2 0.10987 0.03713 2.959 0.00320 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9122 on 605 degrees of freedom
Multiple R-squared: 0.1747, Adjusted R-squared: 0.1679
F-statistic: 25.61 on 5 and 605 DF, p-value: < 2.2e-16
AIC(mod)
[1] 1629.635
BIC(mod)
[1] 1660.541
# without the factor ( group ) and with all confounding variables
library(stats)
mod <- lm( log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.3239 -0.3892 0.2683 0.6427 1.5709
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.517e-16 3.714e-02 0.000 1.00000
log_count 3.765e-01 3.764e-02 10.003 < 2e-16 ***
Q7_Q7_1 -5.352e-02 4.615e-02 -1.160 0.24666
Q7_Q7_2 1.358e-01 4.707e-02 2.885 0.00406 **
Q8_Q8_1 6.056e-02 4.129e-02 1.467 0.14301
Q10 -3.134e-02 3.974e-02 -0.788 0.43073
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9179 on 605 degrees of freedom
Multiple R-squared: 0.1643, Adjusted R-squared: 0.1574
F-statistic: 23.78 on 5 and 605 DF, p-value: < 2.2e-16
AIC(mod)
[1] 1637.293
BIC(mod)
[1] 1668.199
library(stats)
mod.1 <- lm( log_relational_novelty ~ log_count + Q7_Q7_2 , data = df)
summary(mod.1)
Call:
lm(formula = log_relational_novelty ~ log_count + Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2448 -0.4150 0.2665 0.6621 1.5082
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.442e-16 3.715e-02 0.000 1.00000
log_count 3.781e-01 3.719e-02 10.164 < 2e-16 ***
Q7_Q7_2 1.184e-01 3.719e-02 3.184 0.00153 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9182 on 608 degrees of freedom
Multiple R-squared: 0.1596, Adjusted R-squared: 0.1568
F-statistic: 57.74 on 2 and 608 DF, p-value: < 2.2e-16
AIC(mod.1)
[1] 1634.693
BIC(mod.1)
[1] 1652.353
library(stats)
mod.2 <- lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2 , data = df)
summary(mod.2)
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.1984 -0.4152 0.2400 0.6402 1.5394
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.17752 0.07523 -2.360 0.01861 *
factor(group)1 0.12829 0.10455 1.227 0.22027
factor(group)2 0.25395 0.10827 2.345 0.01932 *
factor(group)3 0.32098 0.10365 3.097 0.00205 **
log_count 0.37833 0.03716 10.180 < 2e-16 ***
Q7_Q7_2 0.10987 0.03713 2.959 0.00320 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9122 on 605 degrees of freedom
Multiple R-squared: 0.1747, Adjusted R-squared: 0.1679
F-statistic: 25.61 on 5 and 605 DF, p-value: < 2.2e-16
AIC(mod.2)
[1] 1629.635
BIC(mod.2)
[1] 1660.541
# model with and without groups are very different ( significant )
anova(mod.1, mod.2)
Analysis of Variance Table
Model 1: log_relational_novelty ~ log_count + Q7_Q7_2
Model 2: log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2
Res.Df RSS Df Sum of Sq F Pr(>F)
1 608 512.64
2 605 503.44 3 9.1943 3.683 0.01195 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(ALSM)
Loading required package: leaps
Loading required package: SuppDists
Loading required package: car
Loading required package: carData
step(lm(log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data=df),
method="both", trace = 1 )
Start: AIC=-103.05
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10
Df Sum of Sq RSS AIC
- Q10 1 0.243 501.43 -104.753
- Q7_Q7_1 1 1.012 502.20 -103.816
- Q8_Q8_1 1 1.440 502.63 -103.296
<none> 501.19 -103.049
- factor(group) 3 8.603 509.79 -98.650
- Q7_Q7_2 1 5.897 507.08 -97.902
- log_count 1 83.308 584.50 -11.096
Step: AIC=-104.75
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1
Df Sum of Sq RSS AIC
- Q7_Q7_1 1 1.022 502.45 -105.509
- Q8_Q8_1 1 1.258 502.69 -105.222
<none> 501.43 -104.753
- factor(group) 3 8.884 510.31 -100.022
- Q7_Q7_2 1 5.680 507.11 -99.870
- log_count 1 83.348 584.78 -12.801
Step: AIC=-105.51
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2 +
Q8_Q8_1
Df Sum of Sq RSS AIC
- Q8_Q8_1 1 0.989 503.44 -106.308
<none> 502.45 -105.509
- Q7_Q7_2 1 4.754 507.21 -101.756
- factor(group) 3 9.044 511.50 -100.609
- log_count 1 84.566 587.02 -12.465
Step: AIC=-106.31
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2
Df Sum of Sq RSS AIC
<none> 503.44 -106.308
- factor(group) 3 9.194 512.64 -101.250
- Q7_Q7_2 1 7.287 510.73 -99.527
- log_count 1 86.242 589.68 -11.697
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_2, data = df)
Coefficients:
(Intercept) factor(group)1 factor(group)2 factor(group)3 log_count Q7_Q7_2
-0.1775 0.1283 0.2540 0.3210 0.3783 0.1099
mod <- lm( log_relational_novelty ~ factor(group) + Q10 + Q8_Q8_1 + Q7_Q7_1 + Q7_Q7_2 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group) + Q10 + Q8_Q8_1 +
Q7_Q7_1 + Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.1605 -0.4467 0.3399 0.7208 1.4666
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.19601 0.08135 -2.409 0.01627 *
factor(group)1 0.19557 0.11423 1.712 0.08741 .
factor(group)2 0.22325 0.11719 1.905 0.05726 .
factor(group)3 0.35026 0.11211 3.124 0.00187 **
Q10 0.02321 0.04301 0.540 0.58961
Q8_Q8_1 0.07471 0.04438 1.683 0.09280 .
Q7_Q7_1 -0.07583 0.04981 -1.522 0.12849
Q7_Q7_2 0.13346 0.05105 2.614 0.00917 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9845 on 603 degrees of freedom
Multiple R-squared: 0.04181, Adjusted R-squared: 0.03069
F-statistic: 3.759 on 7 and 603 DF, p-value: 0.0005336
Nest Phase in Group [ Linear Mixed Model ]
# convert to nominal factor
df$user2 = factor(df$user2)
df$stage = factor(df$stage)
df$group = factor(df$group)
# explore the data and their levels
library(plyr)
ddply(df, ~ group * stage, function(data) summary(data$log_relational_novelty) )
ddply(df, ~ group * stage, summarise, log_relational_novelty.mean=mean(log_relational_novelty), log_relational_novelty.sd = sd(log_relational_novelty))
# histograms for two factors
hist(df[df$group == 0 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 0 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 0 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 0 & df$stage == 4,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 4,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 4,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 4,]$log_relational_novelty)

boxplot(log_relational_novelty ~ group * stage, data = df, xlab="Group.Stage", ylab="log_relational_novelty")

with(df, interaction.plot(group, stage, log_relational_novelty, ylim=c(0, max(log_relational_novelty)))) # interaction plot

# library for LMM we will use on relational novelty
library(lme4)
library(lmerTest)
library(car)
variability is very much higher in individual user and than in stages/phases for relational novelty. The remaining variability of 0.203502 comes from factor other than individual users and stage. factor(group)1 has higher relational novelty than group 0 by about 0.2099 . factor(group)3 has higher relational novelty than group 0 by about 0.3765.
var.model = lmer( log_relational_novelty ~ factor(group) + ( 1 | user2) + ( 1 | stage), data = df)
summary(var.model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_relational_novelty ~ factor(group) + (1 | user2) + (1 | stage)
Data: df
REML criterion at convergence: 1203.8
Scaled residuals:
Min 1Q Median 3Q Max
-4.2978 -0.2484 0.0522 0.2410 3.5006
Random effects:
Groups Name Variance Std.Dev.
user2 (Intercept) 0.789432 0.88850
stage (Intercept) 0.002792 0.05284
Residual 0.203477 0.45108
Number of obs: 611, groups: user2, 157; stage, 4
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.2208 0.1530 146.2072 -1.443 0.151
factor(group)1 0.2099 0.2103 152.3350 0.998 0.320
factor(group)2 0.2659 0.2097 154.1335 1.268 0.207
factor(group)3 0.3765 0.2079 152.4574 1.812 0.072 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) fct()1 fct()2
factr(grp)1 -0.706
factr(grp)2 -0.708 0.515
factr(grp)3 -0.714 0.519 0.521
reduced.model = lm( log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(reduced.model)
Call:
lm(formula = log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.3239 -0.3892 0.2683 0.6427 1.5709
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.517e-16 3.714e-02 0.000 1.00000
log_count 3.765e-01 3.764e-02 10.003 < 2e-16 ***
Q7_Q7_1 -5.352e-02 4.615e-02 -1.160 0.24666
Q7_Q7_2 1.358e-01 4.707e-02 2.885 0.00406 **
Q8_Q8_1 6.056e-02 4.129e-02 1.467 0.14301
Q10 -3.134e-02 3.974e-02 -0.788 0.43073
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9179 on 605 degrees of freedom
Multiple R-squared: 0.1643, Adjusted R-squared: 0.1574
F-statistic: 23.78 on 5 and 605 DF, p-value: < 2.2e-16
full.model = lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(full.model)
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2745 -0.4240 0.2415 0.6170 1.4761
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.17463 0.07542 -2.315 0.02093 *
factor(group)1 0.13181 0.10606 1.243 0.21441
factor(group)2 0.24455 0.10863 2.251 0.02474 *
factor(group)3 0.31475 0.10396 3.028 0.00257 **
log_count 0.37561 0.03755 10.003 < 2e-16 ***
Q7_Q7_1 -0.05097 0.04623 -1.102 0.27070
Q7_Q7_2 0.12593 0.04732 2.661 0.00799 **
Q8_Q8_1 0.05415 0.04118 1.315 0.18902
Q10 -0.02165 0.04012 -0.540 0.58955
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9124 on 602 degrees of freedom
Multiple R-squared: 0.1784, Adjusted R-squared: 0.1675
F-statistic: 16.34 on 8 and 602 DF, p-value: < 2.2e-16
anova(reduced.model, full.model)
Analysis of Variance Table
Model 1: log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10
Model 2: log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10
Res.Df RSS Df Sum of Sq F Pr(>F)
1 605 509.79
2 602 501.19 3 8.603 3.4445 0.01652 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
boxplot(log_relational_novelty~ stage*group,
col=c("white","lightgray", "blue", "green"),df)

check_collinearity(full.model)
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
factor(group) 1.09 [1.03, 1.25] 1.04 0.92 [0.80, 0.97]
log_count 1.03 [1.00, 1.41] 1.02 0.97 [0.71, 1.00]
Q7_Q7_1 1.57 [1.42, 1.76] 1.25 0.64 [0.57, 0.70]
Q7_Q7_2 1.64 [1.49, 1.85] 1.28 0.61 [0.54, 0.67]
Q8_Q8_1 1.24 [1.15, 1.39] 1.11 0.80 [0.72, 0.87]
Q10 1.18 [1.10, 1.32] 1.09 0.85 [0.76, 0.91]
library(car)
vif(full.model)
GVIF Df GVIF^(1/(2*Df))
factor(group) 1.086998 3 1.014000
log_count 1.033040 1 1.016386
Q7_Q7_1 1.566162 1 1.251464
Q7_Q7_2 1.640509 1 1.280824
Q8_Q8_1 1.242569 1 1.114706
Q10 1.179095 1 1.085861
vif(reduced.model)
log_count Q7_Q7_1 Q7_Q7_2 Q8_Q8_1 Q10
1.025572 1.541842 1.603686 1.234487 1.143380
library(multcomp)
library(lsmeans)
#summary(glht(full.model, lsm(pairwise ~ group / stage)), test = adjusted(type='holm'))
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyBRMTAgaXMgYXNwaXJhdGlvbnMgCiMgUTdfN18xIGlzIGphdmFzY3JpcHQgc2tpbGxzIAojIFE3XzdfMiBpcyBtYXNodXAgc2tpbGxzIAojIFE4XzhfMSBpcyBnZW5lcmFsIHByb2dyYW1taW5nIHNraWxscyAKCmBgYHtyfQojIHJlYWRpbmcgaW5wdXQgCmRmPC1yZWFkLmNzdigiaW5wdXQvY29tbWl0c19ub3ZlbHR5LmNzdiIsIGhlYWRlciA9VFJVRSwgc2VwPSIsIikKZGYgPC0gZGZbY29tcGxldGUuY2FzZXMoZGYpLCBdICAKZGYKYGBgCgpgYGB7cn0KZGYkZ3JvdXAgPSBmYWN0b3IoZGYkZ3JvdXApCmBgYAoKCmBgYHtyfQojIGNyZWF0ZSBuZXcgY29sdW1ucyBjYWxsZWQgbG9nIHJlbGF0aW9uYWwgbm92ZWx0eQpkZiRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IDwtIGxvZyhkZiRzaW1pbGFyaXR5KzEpIApkZiRsb2dfY291bnQgPC0gbG9nKGRmJGNvdW50KzEpIApkZgpgYGAKCgpgYGB7cn0KIyBzdGFuZGFyZGl6aW5nIHZhcmlhYmxlcyBmb3Igc2tpbGxzIGFuZCBhc3BpcmF0aW9ucy4gCmNvbHMgPC0gYygiUTdfUTdfMSIsICJRN19RN18yIiwgIlE4X1E4XzEiLCAiUTEwIiwgImxvZ19yZWxhdGlvbmFsX25vdmVsdHkiLCAibG9nX2NvdW50IikKZGZbY29sc10gPC0gc2NhbGUoZGZbY29sc10pCmRmCmBgYAoKCmBgYHtyfQptb2QgPC0gbG0obG9nX2NvdW50IH4gZmFjdG9yKGdyb3VwKSwgZGF0YT1kZikKc3VtbWFyeShtb2QpCmBgYAoKCmBgYHtyfQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBRMTAgKyBROF9ROF8xICsgUTdfUTdfMSArIFE3X1E3XzIsIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCmBgYAoKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGxvZ19jb3VudCAsIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCmBgYAoKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkvc3RhZ2UgKyBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKCmBgYHtyfQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApICogc3RhZ2UgKyBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKCgoKYGBge3J9CiMgUHJvcG9zZWQgbW9kZWwgYnkgc3RlcHdpc2UgcmVncmVzc2lvbgpsaWJyYXJ5KHN0YXRzKQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApICsgbG9nX2NvdW50ICsgUTdfUTdfMiAsIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCkFJQyhtb2QpCkJJQyhtb2QpCmBgYAoKYGBge3J9CiMgd2l0aG91dCB0aGUgZmFjdG9yICggZ3JvdXAgKSBhbmQgd2l0aCBhbGwgY29uZm91bmRpbmcgdmFyaWFibGVzIApsaWJyYXJ5KHN0YXRzKQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpBSUMobW9kKQpCSUMobW9kKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KHN0YXRzKQptb2QuMSA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGxvZ19jb3VudCArIFE3X1E3XzIgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kLjEpCkFJQyhtb2QuMSkKQklDKG1vZC4xKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KHN0YXRzKQptb2QuMiA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBsb2dfY291bnQgKyBRN19RN18yICwgZGF0YSA9IGRmKQpzdW1tYXJ5KG1vZC4yKQpBSUMobW9kLjIpCkJJQyhtb2QuMikKYGBgCgpgYGB7cn0KIyBtb2RlbCB3aXRoIGFuZCB3aXRob3V0IGdyb3VwcyBhcmUgdmVyeSBkaWZmZXJlbnQgKCBzaWduaWZpY2FudCApCmFub3ZhKG1vZC4xLCBtb2QuMikKYGBgCgoKYGBge3J9CmxpYnJhcnkoQUxTTSkKc3RlcChsbShsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArIGxvZ19jb3VudCArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCwgZGF0YT1kZiksCm1ldGhvZD0iYm90aCIsIHRyYWNlID0gMSApCmBgYAoKCgoKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBRMTAgKyBROF9ROF8xICsgUTdfUTdfMSArIFE3X1E3XzIgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKCiMjIE5lc3QgUGhhc2UgaW4gR3JvdXAgWyBMaW5lYXIgTWl4ZWQgTW9kZWwgXQoKYGBge3J9CiMgY29udmVydCB0byBub21pbmFsIGZhY3RvcgpkZiR1c2VyMiA9IGZhY3RvcihkZiR1c2VyMikKZGYkc3RhZ2UgPSBmYWN0b3IoZGYkc3RhZ2UpCmRmJGdyb3VwID0gZmFjdG9yKGRmJGdyb3VwKQpgYGAKCmBgYHtyfQojIGV4cGxvcmUgdGhlIGRhdGEgYW5kIHRoZWlyIGxldmVscyAKbGlicmFyeShwbHlyKQpkZHBseShkZiwgfiBncm91cCAqIHN0YWdlLCBmdW5jdGlvbihkYXRhKSBzdW1tYXJ5KGRhdGEkbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkgKQpkZHBseShkZiwgfiBncm91cCAqIHN0YWdlLCBzdW1tYXJpc2UsIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkubWVhbj1tZWFuKGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpLCBsb2dfcmVsYXRpb25hbF9ub3ZlbHR5LnNkID0gc2QobG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkpCmBgYApgYGB7cn0KIyBoaXN0b2dyYW1zIGZvciB0d28gZmFjdG9ycwpoaXN0KGRmW2RmJGdyb3VwID09IDAgJiBkZiRzdGFnZSA9PSAxLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAwICYgZGYkc3RhZ2UgPT0gMixdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMCAmIGRmJHN0YWdlID09IDMsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDAgJiBkZiRzdGFnZSA9PSA0LF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAxICYgZGYkc3RhZ2UgPT0gMSxdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMSAmIGRmJHN0YWdlID09IDIsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDEgJiBkZiRzdGFnZSA9PSAzLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAxICYgZGYkc3RhZ2UgPT0gNCxdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMiAmIGRmJHN0YWdlID09IDEsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDIgJiBkZiRzdGFnZSA9PSAyLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAyICYgZGYkc3RhZ2UgPT0gMyxdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMiAmIGRmJHN0YWdlID09IDQsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDMgJiBkZiRzdGFnZSA9PSAxLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAzICYgZGYkc3RhZ2UgPT0gMixdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMyAmIGRmJHN0YWdlID09IDMsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDMgJiBkZiRzdGFnZSA9PSA0LF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKYm94cGxvdChsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IH4gZ3JvdXAgKiBzdGFnZSwgZGF0YSA9IGRmLCB4bGFiPSJHcm91cC5TdGFnZSIsIHlsYWI9ImxvZ19yZWxhdGlvbmFsX25vdmVsdHkiKQp3aXRoKGRmLCBpbnRlcmFjdGlvbi5wbG90KGdyb3VwLCBzdGFnZSwgbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSwgeWxpbT1jKDAsIG1heChsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KSkpKSAjIGludGVyYWN0aW9uIHBsb3QKYGBgCmBgYHtyfQojIGxpYnJhcnkgZm9yIExNTSB3ZSB3aWxsIHVzZSBvbiByZWxhdGlvbmFsIG5vdmVsdHkgCgpsaWJyYXJ5KGxtZTQpCmxpYnJhcnkobG1lclRlc3QpCmxpYnJhcnkoY2FyKQpgYGAKCiMgc2V0IHN1bS10by16ZXJvIGNvbnRyYXN0cyBmb3IgdGhlIEFub3ZhIGNlbGxzIAoKYGBge3J9CmNvbnRyYXN0cyhkZiRncm91cCkgPD0gImNvbnRyLnN1bSIKY29udHJhc3RzKGRmJHN0YWdlKSA8PSAiY29udHIuc3VtIgpgYGAKCgpgYGB7cn0KIyBzdGFnZSBuZXN0ZWQgd2l0aGluIGdyb3VwIApmdWxsLm1vZGVsID0gbG1lciggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGdyb3VwL3N0YWdlICsgKDEgfCB1c2VyMiApLCBkYXRhID0gZGYsIFJFTUwgPSBGQUxTRSkKQW5vdmEoZnVsbC5tb2RlbCwgdHlwZT0zLCB0ZXN0LnN0YXRpc3RpY3M9IkYiKQpmdWxsLm1vZGVsCmBgYApgYGB7cn0KbGlicmFyeShwZXJmb3JtYW5jZSkKCmNoZWNrX2NvbGxpbmVhcml0eShmdWxsLm1vZGVsKQoKCmBgYAojIHZhcmlhYmlsaXR5IGlzIHZlcnkgbXVjaCBoaWdoZXIgaW4gaW5kaXZpZHVhbCB1c2VyIGFuZCB0aGFuIGluIHN0YWdlcy9waGFzZXMgZm9yIHJlbGF0aW9uYWwgbm92ZWx0eS4gVGhlIHJlbWFpbmluZyB2YXJpYWJpbGl0eSBvZiAgMC4yMDM1MDIgY29tZXMgZnJvbSBmYWN0b3Igb3RoZXIgdGhhbiBpbmRpdmlkdWFsIHVzZXJzIGFuZCBzdGFnZS4gZmFjdG9yKGdyb3VwKTEgIGhhcyBoaWdoZXIgcmVsYXRpb25hbCBub3ZlbHR5IHRoYW4gZ3JvdXAgMCBieSBhYm91dCAgMC4yMDk5IC4gZmFjdG9yKGdyb3VwKTMgaGFzIGhpZ2hlciByZWxhdGlvbmFsIG5vdmVsdHkgdGhhbiBncm91cCAwIGJ5IGFib3V0IDAuMzc2NS4gCgpgYGB7cn0KdmFyLm1vZGVsID0gbG1lciggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyAoIDEgfCB1c2VyMikgKyAoIDEgfCBzdGFnZSksIGRhdGEgPSBkZikKc3VtbWFyeSh2YXIubW9kZWwpCmBgYAoKCmBgYHtyfQpyZWR1Y2VkLm1vZGVsID0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAsIGRhdGEgPSBkZikKc3VtbWFyeShyZWR1Y2VkLm1vZGVsKQpgYGAKCgpgYGB7cn0KZnVsbC5tb2RlbCA9IGxtKCBsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArIGxvZ19jb3VudCArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCwgZGF0YSA9IGRmKQpzdW1tYXJ5KGZ1bGwubW9kZWwpCmBgYAoKYGBge3J9CmFub3ZhKHJlZHVjZWQubW9kZWwsIGZ1bGwubW9kZWwpCmBgYAoKCmBgYHtyfQpib3hwbG90KGxvZ19yZWxhdGlvbmFsX25vdmVsdHl+IHN0YWdlKmdyb3VwLApjb2w9Yygid2hpdGUiLCJsaWdodGdyYXkiLCAiYmx1ZSIsICJncmVlbiIpLGRmKQpgYGAKCmBgYHtyfQpjaGVja19jb2xsaW5lYXJpdHkoZnVsbC5tb2RlbCkKYGBgCgpgYGB7cn0KbGlicmFyeShjYXIpCgp2aWYoZnVsbC5tb2RlbCkKYGBgCmBgYHtyfQp2aWYocmVkdWNlZC5tb2RlbCkKYGBgCgpgYGB7cn0KbGlicmFyeShtdWx0Y29tcCkKbGlicmFyeShsc21lYW5zKQojc3VtbWFyeShnbGh0KGZ1bGwubW9kZWwsIGxzbShwYWlyd2lzZSB+IGdyb3VwIC8gc3RhZ2UpKSwgdGVzdCA9IGFkanVzdGVkKHR5cGU9J2hvbG0nKSkKYGBgCgo=